/***************************************************
This takes the earnings file and merges it to the 
wide enrollment file (which we will then total up
all earnings) 


****************************************************/

%macro earnmerge; 
        
%macro sample_restriction ; 
        
        if substr(pik,1,1) = '2' ;
        
%mend sample_restriction; 

/* this is just for testing */
%let systems = cdhe thecb cuny suny odhe; 
        
        
%do jj = 1 %to %sysfunc(countw("&systems.", ' ')) ; 
            %let currsys=%scan(&systems.,&jj.) ;
    proc contents data=OUTPUTS.enrollment_wide_&currsys.  ; 
		title "Wide Enrollment &currsys." ;
	run;
	
%end;

data enrollment ; 
        set 
        %do jj = 1 %to %sysfunc(countw("&systems.", ' ')) ; 
            %let currsys=%scan(&systems.,&jj.) ;
            OUTPUTS.enrollment_wide_&currsys. 
        %end;
        ;
        by pik; 
run;
        
data all (drop = ii) 
        earn_merge (keep= pik opeid deglevl_code state degcip_2dig) ;
        merge enrollment (in=a) ICF.icf_us (in=b keep=pik dob)  ;
        by pik; 

        if a ; 
        /* here, filling in gaps if there is a 101 in the enrollment order */

         array enr{21:134} enr21-enr134;     
         first_enr = 134 ; 
         last_enr = 22 ; 
        do ii = 22 to 133 ; 
            if enr{ii-1} = 1 and enr{ii+1} = 1 then enr{ii} = 1 ;
            if enr{ii} = 1 and ii < first_enr then first_enr = ii ; 
            if enr{ii} = 0 and enr{ii-1} = 1 and ii > last_enr then last_enr = ii ; 
        end;

    run;
	
proc print data=earn_merge (obs=20) ;
	var pik ;
	title 'Earn Merge' ; 
run;
 
/**********************************************
Using hash tables
**********************************************/

data earnings_merge/view=earnings_merge ; 
        
    set earn_merge (where=(pik ne [invalid piks])
                    in=in_grads) 
        INPUTS.phf_interleave_b (in=in_phf rename=(state=ui_state)) ;
    by pik ;
    /********************************
        Make an empty array to put in 
        all the observations from a 
        certain pik
    **********************************/
    if _n_ = 1 then do ; 
        declare hash h_enr(dataset:'WORK.earn_merge (obs=0)',ordered:'Y');
        rc=h_enr.defineKey('pik','opeid','deglevl_code','state','degcip_2dig') ;
        rc=h_enr.defineData('pik','opeid','deglevl_code','state','degcip_2dig') ;
        rc=h_enr.defineDone() ;
        declare hiter i_enr('h_enr');
    end;
        
    array arr_e(21:134) e21-e134 ;
    array nat_e(21:134) nat_e21-nat_e134 ; 
    array state_e(21:134) state_e21-state_e134 ; 
    
    if in_grads then do ;
        rc=h_enr.add() ;
    end;
        
    if in_phf then do ;
        rc=i_enr.first() ;
        do while (rc=0) ;
            do ii = 21 to 134 ; 
                nat_e(ii) = arr_e(ii) ; 
                if ui_state = state then state_e(ii)=arr_e(ii) ;
            end;
            output;
            rc=i_enr.next() ;
        end;
     end;
                
     if last.pik then do; 
        rc=h_enr.clear() ;
     end;
run;

data OUTPUTS.testfile ;
    set earnings_merge (obs=1000) ;
run;
                
proc summary data=earnings_merge nway ; 
     class pik opeid deglevl_code degcip_2dig ;
     var nat_e: state_e: ;
     output out=earn_totals (drop=_TYPE_ _FREQ_)
                sum(nat_e: state_e:) =  ;
run;
  
proc sort data=all; 
    by pik opeid deglevl_code degcip_2dig ; 
run;
   
/***********************************
Putting the earnings data 
together with the new data
************************************/
data earntotals ;
    merge all (in=a) earn_totals (in=b) ;
    by pik opeid deglevl_code degcip_2dig ; 
    
    if a; 
run;
                
  
proc sort data=earntotals out=OUTPUTS.earntotals_wide ;
    by pik opeid deglevl_code degcip_2dig  ; 
run;
        
data OUTPUTS.earntotals_long (drop= state_e:
                                    nat_e:
                              enr21-enr134 ii graduate); 
        set OUTPUTS.earntotals_wide ;
        by pik opeid deglevl_code degcip_2dig ; 
        

        %sample_restriction;

        array earn_st{%eval(&startqtr.):%eval(&endqtr.)}  state_e%eval(&startqtr.)-state_e%eval(&endqtr.) ;
        array earn_nat{%eval(&startqtr.):%eval(&endqtr.)}  nat_e%eval(&startqtr.)-nat_e%eval(&endqtr.) ;
            
        array enr{21:134} enr21-enr134; 
        
        do ii = &startqtr. to &endqtr. ; 
            earn_national = earn_nat{ii} ;
            earn_state = earn_st{ii} ;             
            enrolled = enr{ii} ; 
            qtime = ii; 
            year=int((qtime-1)/4)+1985 ;
            if ii >=qtime_grad and graduate = 1 then grad_dummy =1  ;
            else grad_dummy = 0 ; 
            age=floor((ii - %qtime(year(DOB),qtr(DOB),location=datastep))/4);
            if _N_ < 50 then put age= DOB= ii= year= ; 
            if qtime - first_enr =< 20 and qtime-last_enr <= 40 then output; 
        end; 
run; 
                
proc sort data=OUTPUTS.earntotals_long;
      by pik opeid deglevl_code degcip_2dig qtime ;
run;

    
proc sort data=OUTPUTS.earntotals_long out=earntotals; 
    by pik opeid deglevl_code degcip_2dig year qtime; 
run;
    
data earntotals_annual (drop=earn_national earn_state qtime enrolled)  ; 
    set earntotals; 
    by pik opeid deglevl_code degcip_2dig year qtime;
    
    retain national_earn instate_earn enrolled_annual before_enrollment
            after_enrollment;
    if first.degcip_2dig then do ; 
        before_enrollment = 1 ;
        after_enrollment = 0 ; 
    end;
    if first.year then do; 
        national_earn = 0 ;
        instate_earn = 0 ; 
        enrolled_annual = 0 ;
    end;
        
    national_earn = sum(national_earn,earn_national) ;
    instate_earn = sum(instate_earn,earn_state) ;
        
    if enrolled = 1 then enrolled_annual = 1 ;
    if enrolled = 1 then before_enrollment = 0 ; 
    if enrolled = 0 and before_enrollment = 0 then after_enrollment = 1 ; 
        
    if last.year then output earntotals_annual ;
run;

proc sort data=earntotals_annual ; 
	by pik year ; 
run;
        
data seearn (drop=se_earn&starty.-se_earn&endy.)  ;
    set INPUTS.seearnings_wide ;
    by pik ; 
    
    array se_earn{&starty.:&endy.} se_earn&starty.-se_earn&endy.; 
    array s_emp{&starty.:&endy.} self_emp&starty.-self_emp&endy.;
        
    do ii = 2002 to 2016 ; 
        flag_se = s_emp{ii} ;
        se_earnings = se_earn{ii} ;
        year = ii ;
        output ;
    end;
run;
        
data allearnings_annual (rename =(national_earn = earn_national
                                  instate_earn = earn_instate
                                  enrolled_annual = enrolled)
                        drop=self_emp2002-self_emp2016) ;
        merge earntotals_annual (in=a) seearn (in=b) ;
        by pik year ; 
                                
        if missing(se_earnings) then se_earnings = 0  ;
        if missing(flag_se) then flag_se = 0 ; 
        flag_onlyse = (flag_se=1 and national_earn = 0) ;
        
        if a ; 
        
        earn_total = national_earn + se_earnings ; 
                                
        if national_earn > 0 then logearn_national = log(national_earn) ;
        if instate_earn > 0 then logearn_state = log(instate_earn) ;
        if earn_total > 0 then logearn_total = log(earn_total) ;
        if age>=18 and age<=65; 
run;

/**************************
merging in demograhpics 
***************************/

data icf (keep=pik dob male race white black asian hispanic
              pob us_native ethnicity)/view=icf ;
    set ICF.icf_us ;
    male = (sex = 'M') ;
    white = race='1' ;
    black = race = '2' ;
    asian = race = '3' ;
    us_native = (pob = 'A');
    hispanic = (ethnicity='H') ;
run;

data allearnings_annual_demog ; 
    merge allearnings_annual (in=a) icf (in=b) ;
    by pik ; 
    if a ; 
run;
                                
proc export data=allearnings_annual_demog  
             outfile="&outstata/allearnings_cte_regressions.dta" replace; 
run;
      

data earnings_qtr_demog ;
    merge OUTPUTS.earntotals_long (in=a) icf (in=b) ; 
    by pik ;
    if a ; 
run;
    
proc export data=earnings_qtr_demog  
            outfile="&outstata/qtrearnings_cte_regressions.dta" replace; 
run;
                                
%mend earnmerge; 
    
%earnmerge; 
